from lxml import etree

data = """
<div>
  <ul>
       <li class="item-0"><a href="link1.html">first item</a></li>
       <li class="item-1"><a href="link2.html">second item</a></li>
       <li class="item-inactive"><a href="link3.html"><span class="bold">third item</span></a></li>
       <li class="item-1"><a href="link4.html">fourth item</a></li>
       <li class="item-0"><a href="link5.html">fifth item</a>
   </ul>
</div>
"""
# 添加了html和body标签
html = etree.HTML(data)
# print(etree.tostring(html).decode())
print(html.xpath("//li/a/text()"))
print(html.xpath("//li/@class"))
print(html.xpath("//li/a[@href='link1.html']"))
print(html.xpath("//li[1]/a[1]"))
print(html.xpath("//li//span"))
print(html.xpath("//li[last()]/a/@href"))
